In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
import scipy
import numpy as np
from scipy import stats
import os
import re
import math
In [3]:
%matplotlib inline
#plt.style.use('ggplot')
plt.style.use('seaborn-darkgrid')
#sns.set_context("paper", font_scale=4.0)
sns.set_context("paper", font_scale=2.0)
#sns.set_palette("cubehelix", 8)
sns.set_palette("Paired")
# http://www.r-graph-gallery.com/38-rcolorbrewers-palettes/
In [4]:
%cd /projects/kesara/collaborators/Peter/AS-SD  
%pwd
/projects/kesara/collaborators/Peter/AS-SD
Out[4]:
u'/projects/kesara/collaborators/Peter/AS-SD'

Gene Expression Analysis

In [5]:
# DESeq: this script reads DESeq ouput files for all 45 tissues (male vs female) and creates table and dataframe 
# of p-value and log2FC. Further, using p-value matrix, this script also generates combined p-value and zscore of genes.

def dframe(line):
    tissue = os.path.basename(line.strip())
    tissue = re.sub(r"DE_result_|.txt","", tissue)
    print tissue
    df = pd.read_table(line.strip())
    df.reset_index(level=0, inplace=True)
    
    # p-values
    p = df[['index','P.Value']] 
    p = p.rename(columns={'index':'Gene', 'logFC':tissue})
    #print p.head()

    # filter sig. DE genes based on p-value and logFC
    #de = df[(df['adj.P.Val']<=0.1) & (abs(df['logFC']>=0.575))] 
    de = df[df['adj.P.Val']<=0.1] 
    de = de[['index','logFC']]
    de = de.rename(columns={'index':'Gene', 'logFC':tissue})

    return de,p
    
# read DESeq ouptut files
f = open ('downloads/guy/deseq/deseq_list.txt','r')
line = f.readline()
fc,p = dframe(line)
for line in f:
    df_fc, df_p = dframe(line.strip())
    fc = pd.merge(fc, df_fc, how='outer', on='Gene')
    p = pd.merge(p, df_p, how='outer', on='Gene')
f.close()

 
#fc = fc.dropna(how='all')
#p = p.dropna(how='all')
#p = p.fillna(1) # for tissues where genes were not filtered out due to few reads
p = p.set_index('Gene')

### write FC 
#fc['NS'] = fc.isnull().sum(axis=1)
#fc = fc.sort_values(by=['NS'], ascending=True)
fc.to_csv('results/deseq/DE_Sig.FDR.1.tsv',sep='\t',index=False)

# combined pval (order statistics)
p['ret'] =   p.apply(np.asarray,dtype=float).apply(lambda x: scipy.stats.combine_pvalues(x, method='fisher', weights=None), axis=1)
p['zscore'] = p['ret'].apply(lambda x: x[0])
p['comb_p'] = p['ret'].apply(lambda x: x[1])
p = p.drop(['ret'], axis=1)
p = p.dropna(how='any')
#p = p.sort_values(by=['comb_p'], ascending=True)
p = p.sort_values(by=['zscore'], ascending=False)

p.to_csv('results/deseq/comb_pvalue.tsv',sep='\t')
p.loc[p['comb_p']<=0.1, ['zscore']].to_csv('results/deseq/comb_pvalue_10_zscore.rnk',sep='\t')

#print 'top 10 genes..'
#print p[['zscore', 'comb_p']].head(50)
#p['zscore'].to_csv('results/deseq/ranked_gene_zscore.rnk',sep='\t')
#print "Search"
#print p.loc['FAM86C1']

### RBPs
rbp = pd.read_table('ref/RBP_short_list_Human_Mouse.txt')
rbp_fc = fc[fc.Gene.isin(rbp['HumanGeneName'].tolist())]
rbp_fc.to_csv('results/deseq/DE_Sig.RBP.FDR.1.tsv',sep='\t',index=False)
Adipose - Subcutaneous
Adipose - Visceral (Omentum)
Adrenal Gland
Artery - Aorta
Artery - Coronary
Artery - Tibial
Bladder
Brain - Amygdala
Brain - Anterior cingulate cortex (BA24)
Brain - Caudate (basal ganglia)
Brain - Cerebellar Hemisphere
Brain - Cerebellum
Brain - Cortex
Brain - Frontal Cortex (BA9)
Brain - Hippocampus
Brain - Hypothalamus
Brain - Nucleus accumbens (basal ganglia)
Brain - Putamen (basal ganglia)
Brain - Spinal cord (cervical c-1)
Brain - Substantia nigra
Breast - Mammary Tissue
Cells - EBV-transformed lymphocytes
Cells - Transformed fibroblasts
Colon - Sigmoid
Colon - Transverse
Esophagus - Gastroesophageal Junction
Esophagus - Mucosa
Esophagus - Muscularis
Heart - Atrial Appendage
Heart - Left Ventricle
Kidney - Cortex
Liver
Lung
Minor Salivary Gland
Muscle - Skeletal
Nerve - Tibial
Pancreas
Pituitary
Skin - Not Sun Exposed (Suprapubic)
Skin - Sun Exposed (Lower leg)
Small Intestine - Terminal Ileum
Spleen
Stomach
Thyroid
Whole Blood
In [7]:
p.shape
p.tail()
p.head(20)
Out[7]:
P.Value_x P.Value_y P.Value_x P.Value_y P.Value_x P.Value_y P.Value_x P.Value_y P.Value_x P.Value_y ... P.Value_y P.Value_x P.Value_y P.Value_x P.Value_y P.Value_x P.Value_y P.Value zscore comb_p
Gene
KDM6A 1.697615e-86 8.700817e-44 8.418479e-32 1.583914e-68 1.422165e-39 2.081794e-74 0.072042 5.566649e-06 3.068227e-07 3.703851e-16 ... 2.740419e-31 2.799118e-34 3.023185e-49 8.089217e-10 2.529175e-39 9.746776e-26 3.088716e-92 5.436907e-25 7242.298995 0.000000e+00
ZFX 3.160103e-51 6.914771e-34 5.697382e-31 1.307359e-52 2.050077e-40 1.101438e-59 0.218377 5.032751e-09 1.593278e-11 2.080004e-18 ... 6.925164e-12 9.915573e-41 9.465407e-43 2.396679e-21 6.987599e-30 1.311722e-17 4.900500e-53 1.873269e-17 6121.463733 0.000000e+00
KDM5C 5.557365e-53 3.829372e-36 1.050517e-35 3.170415e-29 3.635654e-18 7.062087e-34 0.000635 9.393820e-07 3.885729e-06 1.128232e-14 ... 9.807154e-15 4.560234e-24 1.562960e-45 7.788687e-16 6.672756e-27 5.425869e-34 9.001807e-60 1.089660e-09 4680.053949 0.000000e+00
JPX 1.128168e-42 1.305889e-23 4.566258e-17 8.207106e-55 4.726250e-28 1.063146e-55 0.047387 8.598633e-10 6.952024e-13 3.918816e-25 ... 1.909844e-22 5.204442e-14 7.831381e-11 9.344406e-18 2.955397e-20 7.187212e-29 3.123029e-39 1.005710e-14 4598.536428 0.000000e+00
HDHD1 1.246330e-26 2.198223e-22 4.434365e-30 8.852395e-27 2.919467e-10 1.962597e-12 0.104203 2.572721e-11 1.843307e-12 7.651718e-23 ... 3.301007e-11 1.981487e-22 1.633126e-31 1.115716e-07 2.019444e-28 2.454024e-18 5.390535e-32 4.140431e-10 3839.158827 0.000000e+00
RPS4X 2.882921e-28 9.725281e-23 5.363503e-14 3.967371e-26 1.614470e-15 2.204805e-33 0.010122 8.487543e-02 1.055489e-02 5.633550e-03 ... 1.104128e-05 1.655773e-28 5.561268e-29 6.041460e-13 1.790765e-24 2.492910e-25 1.665870e-23 1.459999e-06 3328.203901 0.000000e+00
EIF1AX 2.711684e-34 8.564721e-18 1.515370e-24 9.131457e-35 1.237458e-14 8.975651e-41 0.069094 3.163325e-05 5.406459e-05 6.161079e-09 ... 9.240388e-03 1.302617e-17 5.386181e-26 1.304065e-07 9.956191e-33 3.295455e-19 2.589134e-28 4.114231e-19 3291.806318 0.000000e+00
ZRSR2 1.768599e-29 1.413051e-09 8.661415e-14 4.131557e-30 1.286701e-10 5.622497e-46 0.101941 3.626015e-01 9.811766e-02 2.118678e-04 ... 5.283065e-08 3.053518e-42 1.945400e-69 1.501088e-09 4.570118e-09 1.755596e-10 3.558505e-34 6.985558e-09 3111.820127 0.000000e+00
DDX3X 8.109139e-20 5.793221e-09 2.298833e-22 4.342606e-22 5.615456e-13 9.603221e-33 0.064037 6.159718e-07 5.821443e-09 1.332439e-10 ... 2.938911e-23 6.210342e-19 9.329672e-23 4.721078e-08 2.961664e-18 1.022196e-06 1.214745e-31 1.011758e-06 2832.342498 0.000000e+00
EIF2S3 4.114332e-21 7.060848e-15 1.031358e-07 7.286144e-23 2.300235e-14 3.881279e-34 0.201940 1.499118e-10 1.557546e-10 1.553994e-17 ... 4.981035e-12 6.704188e-20 6.454948e-17 1.429178e-07 2.930843e-22 2.944188e-05 1.201117e-19 1.998767e-17 2715.716066 0.000000e+00
CD99 1.091227e-09 3.050018e-13 3.833972e-39 7.768943e-09 4.340310e-11 1.636998e-21 0.270695 5.358675e-05 3.023502e-04 6.309481e-11 ... 7.306589e-28 1.469317e-06 3.460873e-05 2.171557e-06 1.828876e-11 6.090452e-06 2.026149e-23 8.175117e-09 2389.724768 0.000000e+00
PNPLA4 3.316870e-21 7.847173e-14 7.706412e-18 4.153494e-18 2.007312e-05 8.719112e-09 0.839397 1.671352e-08 2.688207e-02 1.118472e-14 ... 1.773805e-08 2.232932e-24 1.228643e-23 3.646181e-06 3.030954e-19 6.920068e-09 6.592725e-16 1.368420e-12 2139.411933 0.000000e+00
SYAP1 3.765115e-40 7.976440e-07 1.756158e-09 1.601338e-18 3.620247e-10 1.085615e-21 0.214605 4.356437e-06 7.670803e-05 4.270780e-06 ... 1.031159e-02 4.646310e-22 4.228538e-38 4.704694e-02 7.867520e-09 1.578965e-06 7.182218e-12 1.850683e-01 1889.077088 0.000000e+00
ZBED1 3.739893e-17 2.625333e-09 9.139653e-01 2.558308e-04 5.747308e-06 9.589505e-20 0.019327 1.394834e-06 6.534754e-10 3.246911e-09 ... 7.747209e-09 7.198851e-11 1.591394e-10 6.507393e-03 5.556785e-05 2.357911e-06 6.058777e-12 1.362068e-01 1846.291277 0.000000e+00
FRG1B 3.834946e-19 1.440109e-10 9.763403e-06 8.126580e-14 2.493566e-05 2.183407e-18 0.312963 2.339398e-03 2.633454e-05 2.719553e-05 ... 1.271687e-06 3.226906e-13 4.555351e-17 1.987628e-06 7.863415e-06 5.523328e-17 2.169476e-17 4.641243e-16 1833.101119 0.000000e+00
STS 1.897475e-23 4.096115e-08 1.160470e-13 5.506946e-38 5.065432e-10 1.210232e-04 0.208258 5.165563e-04 2.317750e-01 4.227880e-07 ... 2.925426e-06 2.956650e-13 3.296307e-23 2.993880e-06 7.739657e-08 1.761561e-12 1.363157e-08 6.636633e-05 1760.262175 8.349406e-308
TXLNG 8.764714e-14 5.612064e-06 2.653543e-04 4.192924e-17 5.877914e-06 8.628145e-17 0.122741 8.563034e-03 3.731924e-02 5.362175e-05 ... 4.840744e-05 4.651656e-20 1.273636e-25 9.611560e-05 1.078588e-17 5.341163e-05 2.216780e-19 2.206903e-08 1736.216252 7.596988e-303
SMC1A 6.953609e-13 8.378252e-12 1.762318e-16 4.081287e-09 4.449448e-05 1.085774e-08 0.382877 9.698275e-01 5.081611e-02 6.255434e-04 ... 1.652812e-10 2.146985e-11 3.659943e-12 4.305470e-08 3.822898e-17 5.017680e-06 4.950249e-20 2.911662e-16 1422.296729 1.743953e-238
GEMIN8 4.216610e-08 3.374101e-05 7.599098e-06 5.638513e-16 6.515003e-06 1.344051e-07 0.109968 1.523071e-02 1.272816e-01 1.993240e-02 ... 4.523354e-09 1.138525e-12 4.857167e-16 1.066409e-01 8.161738e-03 6.417684e-05 1.145609e-18 2.024173e-01 1338.407856 1.985586e-221
PRKX 2.653592e-17 4.402591e-02 2.973691e-03 7.771235e-04 1.242232e-02 1.152929e-01 0.963385 1.868135e-01 3.361209e-01 1.639092e-01 ... 4.072692e-01 2.236201e-27 1.755117e-40 6.039525e-06 1.116515e-12 2.019184e-07 1.208011e-09 6.531321e-05 1259.618837 1.775193e-205

20 rows × 47 columns

In [8]:
p.loc[p['comb_p']<=0.1, ['comb_p']].shape # # genes with comb_p <= 0.1 used for GSEA
# http://software.broadinstitute.org/gsea/msigdb/collections.jsp#H
Out[8]:
(8012, 1)

Top scoring genes that are differentially expressed in male vs. female comparison across 45 tissues

In [9]:
# top genes are sorted based on zscore (as calcuated by Fisher's method)
#p[['zscore', 'comb_p']].head()
gene_list = p.head(20).index.tolist()
top = fc[fc.Gene.isin(gene_list)].set_index('Gene')
top
Out[9]:
Adipose - Subcutaneous Adipose - Visceral (Omentum) Adrenal Gland Artery - Aorta Artery - Coronary Artery - Tibial Bladder Brain - Amygdala Brain - Anterior cingulate cortex (BA24) Brain - Caudate (basal ganglia) ... Nerve - Tibial Pancreas Pituitary Skin - Not Sun Exposed (Suprapubic) Skin - Sun Exposed (Lower leg) Small Intestine - Terminal Ileum Spleen Stomach Thyroid Whole Blood
Gene
KDM6A 0.621397 0.612571 0.641252 0.662887 0.676291 0.706321 NaN 0.437821 0.525613 0.575163 ... 0.659804 0.726679 0.727479 0.597885 0.586335 0.673486 0.736660 0.638417 0.706568 0.691189
KDM5C 0.479026 0.485109 0.511482 0.381768 0.457163 0.418478 NaN 0.361472 0.264624 0.402101 ... 0.482053 0.438654 0.463696 0.409263 0.455317 0.513104 0.489643 0.459318 0.546328 0.281891
ZFX 0.506273 0.509852 0.591792 0.687367 0.618131 0.627196 NaN 0.509551 0.684955 0.620180 ... 0.590732 0.496265 0.568528 0.617976 0.555304 0.615881 0.594196 0.521061 0.607886 0.578198
JPX 0.485265 0.479434 0.508810 0.626770 0.582768 0.526843 NaN 0.484708 0.550713 0.608148 ... 0.533077 0.586024 0.687027 0.364691 0.309127 0.588582 0.563968 0.556250 0.498247 0.431061
SYAP1 0.424427 0.302247 0.406008 0.387269 0.392603 0.310119 NaN 0.291938 0.237712 0.241721 ... 0.339433 0.261379 NaN 0.328772 0.368777 NaN 0.336042 0.253517 0.232942 NaN
EIF1AX 0.525233 0.499210 0.491877 0.530184 0.560199 0.520755 NaN 0.288181 0.310344 0.329001 ... 0.551036 0.555690 NaN 0.557499 0.542889 0.474024 0.713114 0.482802 0.372577 0.651234
ZRSR2 0.337531 0.318124 0.489181 0.520267 0.390085 0.527744 NaN NaN NaN NaN ... 0.412835 0.533726 0.454299 0.551365 0.541408 0.568604 0.368137 0.362942 0.499883 0.303169
RPS4X 0.400646 0.483357 0.415693 0.509715 0.459594 0.545695 NaN NaN NaN NaN ... 0.466381 0.598854 0.334298 0.581722 0.491921 0.533567 0.668636 0.525709 0.434806 0.384969
HDHD1 0.466795 0.512307 0.676708 0.708489 0.537262 0.403429 NaN 0.540776 0.515362 0.575724 ... 0.467805 0.650432 0.485608 0.470140 0.485699 0.646000 0.619689 0.866838 0.483222 0.708073
STS 0.575284 0.367339 0.616544 0.665172 0.525073 0.211310 NaN NaN NaN 0.535967 ... 0.365812 0.463996 0.519594 0.759690 0.679330 0.510554 0.430325 0.603976 0.270404 0.477525
PNPLA4 0.476379 0.486673 0.704395 0.454316 0.310833 0.290340 NaN 0.629763 NaN 0.522616 ... 0.390530 0.615476 0.430798 0.474156 0.435204 0.540473 0.601878 0.605742 0.390505 0.428362
EIF2S3 0.276233 0.280874 0.237493 0.378260 0.370900 0.443449 NaN 0.387033 0.357102 0.366419 ... 0.360481 0.187308 0.309221 0.319645 0.250387 0.321727 0.395793 0.199754 0.318195 0.272035
DDX3X 0.388197 0.421860 0.544260 0.479974 0.466131 0.453626 NaN 0.393357 0.388791 0.397418 ... 0.492197 0.496741 0.641578 0.457171 0.353438 0.382727 0.459672 0.377442 0.421006 0.361993
FRG1B -0.495596 -0.443181 -0.387765 -0.432299 -0.321218 -0.518023 NaN NaN -0.739697 -0.594021 ... -0.491073 -0.374312 -0.491672 -0.543912 -0.581209 -0.477864 -0.388592 -0.651325 -0.486616 -0.587567
PRKX 0.313277 NaN NaN 0.222901 NaN NaN NaN NaN NaN NaN ... 0.586511 NaN NaN 0.466059 0.491488 0.654992 0.375519 0.343481 0.483140 0.387294
ZBED1 -0.249612 -0.233761 NaN -0.155875 -0.300661 -0.320247 NaN -0.431830 -0.515011 -0.417695 ... -0.411217 -0.200789 -0.313008 -0.315976 -0.241900 NaN -0.182057 -0.191533 -0.242271 NaN
TXLNG 0.409696 0.332284 0.187557 0.351132 0.332329 0.400202 NaN NaN NaN 0.246094 ... 0.389145 0.329856 0.230601 0.490181 0.482398 0.277092 0.457207 0.227270 0.333102 0.396596
SMC1A 0.219103 0.340899 0.371323 0.260458 0.287648 0.218755 NaN NaN NaN NaN ... 0.269528 0.203103 0.260200 0.271125 0.225868 0.355373 0.418991 0.235235 0.306370 0.346926
CD99 -0.231767 -0.322881 -0.807974 -0.231778 -0.403807 -0.421547 NaN -0.760816 NaN -0.958131 ... -0.521892 -0.200606 -0.918291 -0.260600 -0.185170 -0.449447 -0.400591 -0.489791 -0.433831 -0.396605
GEMIN8 0.144322 0.151025 0.183405 0.312268 0.240158 0.157600 NaN NaN NaN NaN ... 0.425486 0.218762 0.339992 0.264205 0.260206 NaN NaN 0.174474 0.291301 NaN

20 rows × 45 columns

In [10]:
plt.figure(figsize=(20,10))
sns.heatmap(top, cmap="Spectral", vmin=-1, vmax=1)
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fac2df22f50>
In [15]:
# top 20 genes; but transform -0.3 to 0.3 logFC as NaN
top_2 = top.apply(lambda x: [i if abs(i) > 0.3 else np.nan for i in x])
plt.figure(figsize=(20,10))
sns.heatmap(top_2, cmap="Spectral", vmin=-1, vmax=1)
Out[15]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fac1a312f50>

Differential Expression of RBPs between Male and Females Tissues

  • DESeq Results (adj <= 0.10)
  • Heatmap for RBPs which are found to be differentially expressed in at least one of 45 tissues common to male amd female
  • Scale shows log2 fold change as determined by DESeq
  • Grey area in heatmap denotes the tissues with non-siginicant differential expression (NaN) (padj>0.05)
  • ZRSR2: In the heatmap, ZRSF2 is the only splicing factor that is differentially expressed between male and female across multiple tissues. PSSM for this RBP gene is unavaiable in RBPmap database.
In [68]:
p['comb_p'].plot(kind='hist')
Out[68]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f35244bd1d0>
In [18]:
rbp = pd.read_table('results/deseq/DE_Sig.RBP.FDR.1.tsv') # FDR 10%
rbp = rbp.set_index('Gene')
rbp = rbp.iloc[:,0:-1]
rbp.head()
Out[18]:
Adipose - Subcutaneous Adipose - Visceral (Omentum) Adrenal Gland Artery - Aorta Artery - Coronary Artery - Tibial Bladder Brain - Amygdala Brain - Anterior cingulate cortex (BA24) Brain - Caudate (basal ganglia) ... Muscle - Skeletal Nerve - Tibial Pancreas Pituitary Skin - Not Sun Exposed (Suprapubic) Skin - Sun Exposed (Lower leg) Small Intestine - Terminal Ileum Spleen Stomach Thyroid
Gene
ZRSR2 0.337531 0.318124 0.489181 0.520267 0.390085 0.527744 NaN NaN NaN NaN ... 0.364496 0.412835 0.533726 0.454299 0.551365 0.541408 0.568604 0.368137 0.362942 0.499883
SRPK3 -0.274534 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.212158
PRPF40B -0.118227 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
SRSF1 -0.067174 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
HNRNPA1L2 -0.099441 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 44 columns

In [19]:
rbp.shape
Out[19]:
(70, 44)
In [20]:
#from matplotlib.colors import LogNorm
plt.subplots(figsize=(25,25))
#sns.heatmap(rbp, cmap="YlGnBu")
#sns.heatmap(rbp, cmap="PiYG")
sns.heatmap(rbp, cmap="Spectral", vmin=-0.5, vmax=0.5)
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fac1a028ed0>
In [21]:
# suggestions from Guru: set logFC from +0.3 to - 0.3 as Nan, and scale down logFC > 0.5 to 0.5 for the better heatmap visualization
rbp_2 = rbp.apply(lambda x: [0.5 if abs(i) > 0.5 else i for i in x])
rbp_2 = rbp.apply(lambda x: [i if abs(i) > 0.3 else np.NaN for i in x])
rbp_2.head()
Out[21]:
Adipose - Subcutaneous Adipose - Visceral (Omentum) Adrenal Gland Artery - Aorta Artery - Coronary Artery - Tibial Bladder Brain - Amygdala Brain - Anterior cingulate cortex (BA24) Brain - Caudate (basal ganglia) ... Muscle - Skeletal Nerve - Tibial Pancreas Pituitary Skin - Not Sun Exposed (Suprapubic) Skin - Sun Exposed (Lower leg) Small Intestine - Terminal Ileum Spleen Stomach Thyroid
Gene
ZRSR2 0.337531 0.318124 0.489181 0.520267 0.390085 0.527744 NaN NaN NaN NaN ... 0.364496 0.412835 0.533726 0.454299 0.551365 0.541408 0.568604 0.368137 0.362942 0.499883
SRPK3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
PRPF40B NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
SRSF1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
HNRNPA1L2 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 44 columns

In [23]:
plt.subplots(figsize=(25,25))
sns.heatmap(rbp_2, cmap="Spectral", vmin=-0.5, vmax=0.5)
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fac19bc3c90>

Differential Expression of Genes between Male and Females Tissues

  • DESeq Results (adj <=0.1)
  • Heatmap for top genes which are found to be differentially expressed (adj <=0.05) in across many tissues.
  • Scale shows log Fold Change as determined by DESeq
In [31]:
#rbp.loc['ZRSR2']
rbp.loc['QKI']
Out[31]:
Adipose - Subcutaneous                       0.084137
Adipose - Visceral (Omentum)                      NaN
Adrenal Gland                                     NaN
Artery - Aorta                                    NaN
Artery - Coronary                                 NaN
Artery - Tibial                                   NaN
Bladder                                           NaN
Brain - Amygdala                                  NaN
Brain - Anterior cingulate cortex (BA24)          NaN
Brain - Caudate (basal ganglia)                   NaN
Brain - Cerebellar Hemisphere                     NaN
Brain - Cerebellum                                NaN
Brain - Cortex                                    NaN
Brain - Frontal Cortex (BA9)                      NaN
Brain - Hippocampus                               NaN
Brain - Hypothalamus                              NaN
Brain - Nucleus accumbens (basal ganglia)         NaN
Brain - Putamen (basal ganglia)                   NaN
Brain - Spinal cord (cervical c-1)                NaN
Brain - Substantia nigra                          NaN
Breast - Mammary Tissue                     -0.338184
Cells - EBV-transformed lymphocytes               NaN
Cells - Transformed fibroblasts                   NaN
Colon - Sigmoid                                   NaN
Colon - Transverse                                NaN
Esophagus - Gastroesophageal Junction             NaN
Esophagus - Mucosa                                NaN
Esophagus - Muscularis                            NaN
Heart - Atrial Appendage                          NaN
Heart - Left Ventricle                            NaN
Kidney - Cortex                                   NaN
Liver                                             NaN
Lung                                              NaN
Minor Salivary Gland                              NaN
Muscle - Skeletal                           -0.121364
Nerve - Tibial                                    NaN
Pancreas                                          NaN
Pituitary                                         NaN
Skin - Not Sun Exposed (Suprapubic)               NaN
Skin - Sun Exposed (Lower leg)                    NaN
Small Intestine - Terminal Ileum                  NaN
Spleen                                            NaN
Stomach                                           NaN
Thyroid                                           NaN
Name: QKI, dtype: float64

ZRSR2

  • chrX
  • Zinc Finger CCCH-Type, RNA Binding Motif And Serine/Arginine Rich 2. Pre-mRNA-binding protein required for splicing of both U2- and U12-type introns. Selectively interacts with the 3-splice site of U2- and U12-type pre-mRNAs and promotes different steps in U2 and U12 intron splicing. Recruited to U12 pre-mRNAs in an ATP-dependent manner and is required for assembly of the prespliceosome, a precursor to other spliceosomal complexes. For U2-type introns, it is selectively and specifically required for the second step of splicing.
In [14]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/ZRSR2_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
#ax.set_yscale('log')
ax.set_title('ZRSR2')
Out[14]:
Text(0.5,1,u'ZRSR2')

ZFX

  • chrX
  • (Zinc Finger Protein, X-Linked) is a Protein Coding gene. This gene on the X chromosome is structurally similar to a related gene on the Y chromosome. It encodes a member of the krueppel C2H2-type zinc-finger protein family. The full-length protein contains an acidic transcriptional activation domain (AD), a nuclear localization sequence (NLS) and a DNA binding domain (DBD) consisting of 13 C2H2-type zinc fingers. Studies in mouse embryonic and adult hematopoietic stem cells showed that this gene was required as a transcriptional regulator for self-renewal of both stem cell types, but it was dispensable for growth and differentiation of their progeny. Multiple alternatively spliced transcript variants encoding different isoforms have been identified, but the full-length nature of some variants has not been determined. [provided by RefSeq, May 2010]
In [15]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/ZFX_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
#ax.set_yscale('log')
ax.set_title('ZFX')
Out[15]:
Text(0.5,1,u'ZFX')

KDM6A

This gene is located on the X chromosome and is the corresponding locus to a Y-linked gene which encodes a tetratricopeptide repeat (TPR) protein. The encoded protein of this gene contains a JmjC-domain and catalyzes the demethylation of tri/dimethylated histone H3. Multiple alternatively spliced transcript variants have been found for this gene. [provided by RefSeq, Apr 2014]

In [16]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/KDM6A_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('KDM6A')
Out[16]:
Text(0.5,1,u'KDM6A')

RP13-36G14.4

Upregulatin of this gene in female in context of sex dimorphism https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5769539/

In [17]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/RP13-36G14.4_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('RP13-36G14.4')
Out[17]:
Text(0.5,1,u'RP13-36G14.4')

Expression of intersting 10 RBPs in Male vs. Female Tissues (from Guy)

plotted are the TPM (log) of RBPs extracted from GTEx repository

In [18]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/SNRNP70_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('SNRNP70')
Out[18]:
Text(0.5,1,u'SNRNP70')
In [19]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/HNRNPA1_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('HNRNPA1')
Out[19]:
Text(0.5,1,u'HNRNPA1')
In [20]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/YBX1_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('YBX1')
Out[20]:
Text(0.5,1,u'YBX1')
In [21]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/FUS_tpm.csv')
f,axes = plt.subplots(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('FUS')
Out[21]:
Text(0.5,1,u'FUS')
In [22]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/HNRNPA2B1_tpm.csv')
f,axes = plt.subplots(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('HNRNPA2B1')
Out[22]:
Text(0.5,1,u'HNRNPA2B1')
In [23]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/SF1_tpm.csv')
f,axes = plt.subplots(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('SF1')
Out[23]:
Text(0.5,1,u'SF1')
In [24]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/HNRNPK_tpm.csv')
f,axes = plt.subplots(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('HNRNPK')
Out[24]:
Text(0.5,1,u'HNRNPK')
In [25]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/PABPN1_tpm.csv')
f,axes = plt.subplots(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('PABPN1')
Out[25]:
Text(0.5,1,u'PABPN1')
In [26]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/HNRNPH1_tpm.csv')
f,axes = plt.subplots(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('HNRNPH1')
Out[26]:
Text(0.5,1,u'HNRNPH1')
In [27]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/HNRNPL_tpm.csv')
f,axes = plt.subplots(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('HNRNPL')
Out[27]:
Text(0.5,1,u'HNRNPL')
In [12]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/XIST_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
ax.set_yscale('log')
ax.set_title('XIST')
Out[12]:
Text(0.5,1,u'XIST')
In [13]:
tpm = pd.read_csv('/projects/kesara/collaborators/Peter/AS-SD/results/TPM/XIST_tpm.csv')
plt.figure(figsize=(25,5))
ax = sns.boxplot(x="Tissue", y="TPM", hue="Sex", data=tpm)
plt.xticks(rotation=90)
#ax.set_yscale('log')
ax.set_title('XIST')
Out[13]:
Text(0.5,1,u'XIST')